Spatial Data Table

D Cooley

2018-05-15

library(microbenchmark)
library(spatialdatatable)
library(googleway)
## 
## Attaching package: 'googleway'
## The following objects are masked from 'package:spatialdatatable':
## 
##     decode_pl, encode_pl
library(data.table)
library(geosphere) ## for compariing results

Haversine Distance

n <- 10000
set.seed(20170511)
lats <- -90:90
lons <- -180:180
dt <- data.table::data.table(lat1 = sample(lats, size = n, replace = T),
                             lon1 = sample(lons, size = n, replace = T),
                             lat2 = sample(lats, size = n, replace = T),
                             lon2 = sample(lons, size = n, replace = T))

dt1 <- copy(dt)
dt2 <- copy(dt)

microbenchmark(
    sdt = { dt1[, dtDistance := dtHaversine(lat1, lon1, lat2, lon2)]  },
    geo = { dt2[, geoDistance := distHaversine(matrix(c(lon1, lat1), ncol = 2),
                                   matrix(c(lon2, lat2), ncol = 2))]  }
)
## Unit: milliseconds
##  expr      min       lq     mean   median       uq       max neval
##   sdt 1.277019 1.357861 1.522743 1.425849 1.538868  7.362467   100
##   geo 2.621767 3.100185 4.365120 3.528062 4.575625 17.145586   100
dt1
##        lat1 lon1 lat2 lon2 dtDistance
##     1:   21 -172   55  -42   10330336
##     2:   89   55   66   76    2565164
##     3:  -76  -28   47   64   15086389
##     4:   56 -118   12   56   12433274
##     5:   86 -137  -36   16   14404375
##    ---                               
##  9996:  -32 -158   83  -24   14089513
##  9997:  -52   78  -86  -84    4650145
##  9998:    1  102   47  -78   14677751
##  9999:  -60  -55  -26    4    5818964
## 10000:   10 -158  -72  126   10591494

Comparison of distance calculations

n <- 10000
set.seed(20170511)
lats <- -90:90
lons <- -180:180
dt <- data.table::data.table(lat1 = sample(lats, size = n, replace = T),
                             lon1 = sample(lons, size = n, replace = T),
                             lat2 = sample(lats, size = n, replace = T),
                             lon2 = sample(lons, size = n, replace = T))

dt[, idx := .I]
dt[, distEuclid := dtEuclidean(lat1, lon1, lat2, lon2)]
dt[, distHaversine := dtHaversine(lat1, lon1, lat2, lon2)]
dt[, distCosine := dtCosine(lat1, lon1, lat2, lon2)]

Bearing

n <- 10000
set.seed(20170511)
lats <- -90:90
lons <- -180:180
dt <- data.table::data.table(lat1 = sample(lats, size = n, replace = T),
                             lon1 = sample(lons, size = n, replace = T),
                             lat2 = sample(lats, size = n, replace = T),
                             lon2 = sample(lons, size = n, replace = T))

dt1 <- copy(dt)
dt2 <- copy(dt)

microbenchmark(
    sdt = { dt1[, dtBearing := dtBearing(lat1, lon1, lat2, lon2)]  },
    geo = { dt2[, geoBearing := bearing(matrix(c(lon1, lat1), ncol = 2),
                                   matrix(c(lon2, lat2), ncol = 2))]  }
)
## Unit: milliseconds
##  expr       min        lq      mean    median       uq       max neval
##   sdt  1.192597  1.284276  1.518648  1.354093  1.41057  9.096703   100
##   geo 12.192028 12.521020 13.291837 12.910717 13.26148 21.067780   100
dt1
##        lat1 lon1 lat2 lon2     dtBearing
##     1:   21 -172   55  -42  2.610066e+01
##     2:   89   55   66   76  1.581615e+02
##     3:  -76  -28   47   64  7.728122e+01
##     4:   56 -118   12   56  6.322859e+00
##     5:   86 -137  -36   16  2.844239e+01
##    ---                                  
##  9996:  -32 -158   83  -24  6.278030e+00
##  9997:  -52   78  -86  -84 -1.781474e+02
##  9998:    1  102   47  -78 -9.410992e-08
##  9999:  -60  -55  -26    4  7.672887e+01
## 10000:   10 -158  -72  126 -1.624762e+02

Midpoint

n <- 10000
set.seed(20170511)
lats <- -90:90
lons <- -180:180
dt <- data.table::data.table(lat1 = sample(lats, size = n, replace = T),
                             lon1 = sample(lons, size = n, replace = T),
                             lat2 = sample(lats, size = n, replace = T),
                             lon2 = sample(lons, size = n, replace = T))

dt[, c("midLat", "midLon") := dtMidpoint(lat1, lon1, lat2, lon2)] 
dt
##        lat1 lon1 lat2 lon2    midLat     midLon
##     1:   21 -172   55  -42  58.71023 -134.12349
##     2:   89   55   66   76  77.53177   75.15296
##     3:  -76  -28   47   64 -18.46358   44.25426
##     4:   56 -118   12   56  67.66393   48.11442
##     5:   86 -137  -36   16  28.73039   13.57198
##    ---                                         
##  9996:  -32 -158   83  -24  31.05043 -151.44902
##  9997:  -52   78  -86  -84 -72.88747   75.75280
##  9998:    1  102   47  -78  67.00000  102.00000
##  9999:  -60  -55  -26    4 -46.60747  -16.33706
## 10000:   10 -158  -72  126 -35.22140 -173.80055

Simplifying Polylines

dt_melbourne <- copy(googleway::melbourne)
setDT(dt_melbourne)
object.size(dt_melbourne)
## 800944 bytes
## first simplification with a 10 metre tolerance
dt_melbourne[, polyline := SimplifyPolyline(polyline, distanceTolerance = 10, type = "complex"),
                         by = .(polygonId, pathId)]
object.size(dt_melbourne)
## 273344 bytes
google_map(key = mapKey) %>%
  add_polygons(data = dt_melbourne, polyline = "polyline", id = "polygonId")
reducedcomplex10

reducedcomplex10

dt_melbourne <- copy(googleway::melbourne)
setDT(dt_melbourne)

dt_melbourne[, polyline := SimplifyPolyline(polyline, distanceTolerance = 100, type = "complex"),
                         by = .(polygonId, pathId)]

object.size(dt_melbourne)
## 139080 bytes
google_map(key = mapKey) %>%
  add_polygons(data = dt_melbourne, polyline = "polyline", id = "polygonId")
reducedcomplex10

reducedcomplex10

dt_melbourne <- copy(googleway::melbourne)
setDT(dt_melbourne)

dt_melbourne[, polyline := SimplifyPolyline(polyline, distanceTolerance = 1000, type = "complex"),
                         by = .(polygonId, pathId)]

object.size(dt_melbourne)
## 89136 bytes
google_map(key = mapKey) %>%
  add_polygons(data = dt_melbourne, polyline = "polyline", id = "polygonId")
reducedcomplex10

reducedcomplex10

Nearest Points

library(spatialdatatable)
library(googleway)
library(data.table)

dt_stops <- as.data.table(tram_stops)
dt_route <- as.data.table(tram_route)

dt_nearest <- dtNearestPoints(
    dt1 = copy(dt_route)
    , dt2 = copy(dt_stops)
    , dt1Coords = c("shape_pt_lat", "shape_pt_lon")
    , dt2Coords = c("stop_lat","stop_lon")
    )


## create a polyline between the joined pairs of coordinates
# 
# dt_nearest[, polyline := gepaf::encodePolyline(data.frame(c(dt_nearest[, shape_pt_lat.x], dt_nearest[, stop_lat.y]),
#                                                                                                                   c(dt_nearest[, shape_pt_lon.x], dt_nearest[, stop_lon.y])))]


pl <- sapply(1:nrow(dt_nearest), function(x){
    lats <- dt_nearest[x, c(shape_pt_lat.x, stop_lat.y)]
    lons <- dt_nearest[x, c(shape_pt_lon.x, stop_lon.y)]
    polyline = encode_pl(lat = lats,lon = lons)
})


dt_nearest[, polyline := pl ]

# To plot a google map through `googleway` you need your own Google Maps API key and replace this line with your own key.
# mapKey <- "GOOGLE_MAP_KEY" 

# google_map(key = mapKey) %>%
#   #add_circles(data = dt_route, lat = "shape_pt_lat", lon = "shape_pt_lon", fill_colour = "#FF00FF", stroke_weight = 0) %>%
#   add_markers(data = dt_stops, lat = "stop_lat", lon = "stop_lon") %>%
#   add_polylines(data = dt_route, lat = "shape_pt_lat", lon = "shape_pt_lon") %>%
#   add_circles(data = dt_nearest, lat = "shape_pt_lat.x", lon = "shape_pt_lon.x", stroke_weight = 0, radius = 20) %>%
#   add_polylines(data = dt_nearest, polyline = "polyline", stroke_colour = "#000000")
#   #add_circles(data = dt_stops, lat = "stop_lat", lon = "stop_lon", fill_colour = "#00FF00", stroke_weight = 0)